其他
Python 爬取疫情期间全球股市走向,笑不出来......
The following article is from 数据森麟 Author 徐麟
bsObj = BeautifulSoup(html.decode('utf-8'),"html.parser")
title = bsObj.find_all('table',attrs={'class':'genTbl closedTbl crossRatesTbl elpTbl elp30'})
title = [k.find('td',attrs={'class':'bold left noWrap elp plusIconTd'}).find('a').attrs['href'] for k in title]
url_all = ['https://cn.investing.com' + k + '-historical-data' for k in title]
global_index = pd.DataFrame(columns=['date','country','value','rate'])
err_list = []
for i in range(len(url_all)):
try:
url = url_all[i]
html = requests.get(url,cookies=cookie, headers=header).content
bsObj = BeautifulSoup(html.decode('utf-8'),"html.parser")
country = bsObj.find('div',attrs={'class':'right'}).text.split('\n')[7]
tables = bsObj.findAll('table')
trs = tables[1].find_all('tr')
date = [trs[i].find_all('td')[0].text for i in range(len(trs)) if i != 0]
value = [trs[i].find_all('td')[1].text for i in range(len(trs)) if i != 0]
rate = [trs[i].find_all('td')[6].text for i in range(len(trs)) if i != 0]
global_index = global_index.append(pd.DataFrame({'date':date,'country':country,'value':value,
'rate':rate}),ignore_index=True)
trs = tables[0].find_all('tr')
date = [trs[i].find_all('td')[0].text for i in range(len(trs)) if i != 0]
value = [trs[i].find_all('td')[1].text for i in range(len(trs)) if i != 0]
rate = [trs[i].find_all('td')[6].text for i in range(len(trs)) if i != 0]
global_index = global_index.append(pd.DataFrame({'date':date,'country':country,'value':value,
'rate':rate}),ignore_index=True)
print('right'+str(i))
except:
err_list.append(i)
print('wrong'+str(i))
global_index.to_excel('全球三月指数.xlsx')
最终获取的数据如下:
热 文 推 荐